# cross-country equity markets by founding dates of large companies
# Uses data from S&P Capital IQ to calculate equity market concentration measures by country

# First, load all the data

library(readxl)
library(dplyr)
library(tidyr)
setwd('G:/SM/Data/Equities/Long run data/Long run data project/RDP 2019/Company age')

files <- list.files(path = "./Data",pattern = "Equity Screening Report*")


sp_data <- Reduce(rbind,
                  lapply(files, function(x) read_excel(paste0("./Data/",x), sheet = "Screening", skip = 7)) 
                  ) %>% 
  rename(Market.Capitalisation = `Market Capitalization [My Setting] [Latest] (AUDmm, Historical rate)`) 

sp_data$Market.Capitalisation <- as.numeric(gsub(",","",sp_data$Market.Capitalisation))
sp_data$`Year Founded` <- as.numeric(sp_data$`Year Founded`)

# manually change the founding date of Rio Tinto Australia
# 1905 is the founding of Consolidated Zinc, which Rio merged with when it began its Australian operations
# S&P have it as 1879 in the UK (the actual founding date) and 1959 in Australia (when it moved here)
# we think 1905 makes more sense as the age of the current entity called Rio Tinto in Australia

sp_data[which(sp_data$Ticker == "ASX:RIO"),]$`Year Founded` <- 1905

# manually change BHP. not clear where they got 1850 from: BHP is 1878 and Billiton 1860
# go with 1885 since that's the date the Australian one was founded

sp_data[which(sp_data$Ticker == "ASX:BHP"),]$`Year Founded` <- 1885


# manually change dual-listings market caps (S&P has the full market cap, not the share listed in Aus)

sp_data[which(sp_data$Ticker == "ASX:BHP"),]$`Market.Capitalisation` <- 110590
sp_data[which(sp_data$Ticker == "ASX:RIO"),]$`Market.Capitalisation` <- 37122


# age histogram

age_histo <- sp_data %>%
  filter(`Exchange Country` != "China") %>%
  filter(!is.na(Market.Capitalisation), grepl("Primary Listing", `Equity Security Features`), `Trading Status` == "Active",
         `Equity Security Type` %in% c("Common Stock", "Debt/Equity Composite Units","Preferred Stock"))%>%
  filter(!is.na(`Year Founded`)) %>%
  mutate(Australian = ifelse(`Exchange Country` == "Australia", "Australia","Other"),
         Decade.Raw = `Year Founded`- `Year Founded` %% 10, 
         Decade.Founded = ifelse(Decade.Raw<1800,"<1800",Decade.Raw)) %>%
  group_by(`Exchange Country`) %>%
  mutate(cap_rank = rank(-Market.Capitalisation, ties.method = "random"),
         top100 = ifelse(cap_rank <= 100, "top100","other")) %>%
  group_by(Australian, top100, Decade.Founded) %>%
  summarise(cap = sum(Market.Capitalisation, na.rm =T)) %>%
  group_by(Australian) %>%
  mutate(cap = cap/sum(cap, na.rm = T)*100) %>%
  ungroup() %>%
  mutate(colname = paste(Australian, top100, sep = "_")) %>%
  select(-Australian, - top100) %>%
  spread(colname, cap)

write.csv(age_histo, 'age Aus others.csv', row.names = F)